#delimit;
clear all;
set mem 2000m;
set maxvar 30000;
set matsize 10000;




*foreach k in 10 {;
foreach k in 3 {;

	foreach l in 001 {;
	

		
		foreach w in both  {;

			
			foreach v in all {;
				
				*foreach j in 5 10 15 25 30 {;
				foreach j in 15  {;
				
					forval time = 1995/2015 {;
				
								
						display  "thetas`j'_alpha`k'_beta`l'_`v'_`w'";
					
				

						capture confirm file "thetas`j'_alpha`k'_beta`l'_`v'_`w'`time'.dta.zip";

						display _rc;

			
						if _rc==0  {;

							clear;

							display  "thetas`j'_alpha`k'_beta`l'_`v'_`w'";
							
	
	
							*use "thetas`j'_alpha`k'_beta`l'_`v'_`w'`time'.dta", clear;
							unzipfile thetas`j'_alpha`k'_beta`l'_`v'_`w'`time'.dta.zip;
							use thetas`j'_alpha`k'_beta`l'_`v'_`w'`time'.dta;
							erase thetas`j'_alpha`k'_beta`l'_`v'_`w'`time'.dta;

							
							capture confirm variable year;
							if !_rc {;
							
							};
								else {;
								gen year = int(date/10000);
							};
							
							local s = (`j'-1) ;

						

							sort countryid year;

							by countryid year: egen total_tokens = sum(tokens);
	
							quietly gen rel_token = tokens/total_tokens;
		
							local s = (`j'-1) ;

							forval t = 0 / `s' {;	

								quietly gen rel_theta`t' = theta`t' * rel_token;

							} ;

							gen obs = 1;

							collapse theta* (sum) rel_theta* tokens obs, by(country countryid year);
													

							quietly gen topics = `j';
							quietly gen beta = "`l'";
							quietly gen alpha = "`k'";
							quietly gen paper = "`v'";
							quietly gen selected = "`w'";
						
							if "`k'" == "10" {;
								local real_alpha 10;
							};
						
							if "`k'" == "5" {;
								local real_alpha 5;
							};
						
							if "`k'" == "3" {;
							local real_alpha = 50/`j';
							};
							
							if "`k'" == "0" {;
								local real_alpha = 50/`j';
							};
						
							if "`k'" == "2" {;
								local real_alpha 2;
							};
						
							if "`k'" == "025" {;
								local real_alpha 0.25;
							};
						
							if "`k'" == "05" {;
								local real_alpha 0.5;
							};
						
							if "`k'" == "1" {;
								local real_alpha 1;
							};
							
							if "`k'" == "01" {;
								local real_alpha 0.1;
							};
						
					
						
							forval t = 0 / `s' {;

								quietly replace rel_theta`t' = . if  theta`t' == .;
								quietly gen words`t' = theta`t' * tokens;
								quietly gen ste_theta`t' = (words`t'+`real_alpha')/(tokens+`j'*`real_alpha');
								drop words`t';

								drop  theta`t';
	
							} ;
							
						quietly replace tokens = tokens/1000;
						sort year;
						by year: egen total_tokens = sum(tokens);
						tsset countryid year;
						replace tokens = tokens/total_tokens;
						replace tokens = L1.tokens if tokens==.&L1.tokens!=.;
						quietly gen tokens_sq = tokens*tokens;
						
						save "thetas`j'_alpha`k'_beta`l'_`v'_`w'_collapsed`time'.dta", replace;
					};
					
				};
				
			};
		};
	};
};
};
